
from lxml import etree


# xml_str = """
# <bookstore id='a101' title='abc'>
#   hello world
# </bookstore>
# """
#
# tree = etree.XML(xml_str)
# print(tree,type(tree))
# print(tree.tag,tree.text,tree.attrib)


# xml_str = """
# <bookstore>
#   <book category="cooking1">
#     <title lang="en">Everyday Italian1</title>
#     <author>Giada De Laurentiis1</author>
#     <year>20051</year>
#     <price>30.00</price>
#   </book>
#   <book category="cooking2">
#     <title lang="en">Everyday Italian2</title>
#     <author>Giada De Laurentiis2</author>
#     <year>20052</year>
#     <price>60.00</price>
#   </book>
# </bookstore>
# """
#
# tree = etree.XML(xml_str)
# print(tree, type(tree))
#
#
# print(tree.find("book"))
# print(tree.find("book").attrib)
#
# title = tree.find("book/title")
# print(title.attrib)
#
# books = tree.findall("book")
# for b in books:
#     print(b.find("title").text)

#
# html_str = """
# <!DOCTYPE html>
# <html lang="en">
# <head>
#     <meta charset="UTF-8">
#     <title>Title</title>
# </head>
# <body>
# <ul>
#     <li id="li1">li1</li>
#     <li id="li2">li2</li>
#     <li id="li3">li3</li>
# </ul>
# </body>
# </html>
# """

# tree = etree.HTML(html_str)
#
# print(tree,type(tree))
#
# print(tree.tag)
#
# book = tree.find("head/title")
# print(book)
# items = tree.findall("body/ul/li")
# for item in items:
#     print(item.text)



# html_str = """
# <!DOCTYPE html>
# <html lang="en">
# <head>
#     <meta charset="UTF-8">
#     <title>Title</title>
# </head>
# <body>
# <ul>
#     <li id="li1">li1</li>
#     <li id="li2">li2</li>
#     <li id="li3">li3</li>
# </ul>
# </body>
# </html>
# """

# tree = etree.HTML(html_str)
#
# title = tree.cssselect("head title")
# print(type(title),title[0].text)
#
# lis = tree.cssselect("ul li")
# print(type(lis))
# for li in lis:
#     print(li.text)



html_str = """
<!DOCTYPE html>
<html lang="en">
    <head>
        <meta charset="UTF-8">
        <title>Title</title>
    </head>
    <body>
        <ul>
            <li id="li1">li1</li>
            <li id="li2">li2</li>
            <li id="li3">li3</li>
        </ul>
    </body>
</html>
"""


tree = etree.HTML(html_str)

w1 = tree.xpath("/html")
print(w1,w1[0].tag,type(w1))
w2 = tree.xpath("//title")
print(w2, w2[0].text)
w3 = tree.xpath("/html/head/title")
print(w3, w3[0].text)
w4 = tree.xpath('//ul/li')
print(w4)

w4 = tree.xpath('//ul/li[@id]')
print(w4,w4[0].text)


